In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as S
import statsmodels.api as sm
import pingouin as pg

import re
import json
import os

from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve, precision_recall_curve, auc, average_precision_score, f1_score, PrecisionRecallDisplay
from imblearn.under_sampling import RandomUnderSampler
from imblearn.metrics import geometric_mean_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
import pyper
import datetime

from typing import Tuple

import statsmodels.api as sm
In [2]:
# Mocks for compatibility reasons
np.int = int
np.float = float
np.bool = bool
In [3]:
class ALTERN(int):
    OTHER = 0b00
    WILDTYPE = 0b01
    MIXED = 0b10
    PERMUTATION = 0b100
    INSERT = 0b1000
    DELETION = 0b10000
    REPETITION = 0b100000
    
    def __repr__(self):
        texts = []
        if self == self.OTHER:
            return "OTHER"
        if self & self.WILDTYPE:
            texts.append("WILDTYPE")
        if self & self.MIXED:
            texts.append("MIXED")
        if self & self.PERMUTATION:
            texts.append("PERMUTATION")
        if self & self.INSERT:
            texts.append("INSERT")
        if self & self.DELETION:
            texts.append("DELETION")
        if self & self.REPETITION:
            texts.append("REPETITION")
        return f'ALTERN({" + ".join(texts)})'
In [4]:
def exon_filter(x, exon: Tuple[str, list[int]], altern: ALTERN):
    if not isinstance(x, list):
        return False
    name, exons = exon
    for item in x:
        if not (item["altern"] & altern): continue
        available_exons = item['exons'].get(name, [])
        for e in available_exons:
            if e in exons: return True
    return False
In [5]:
def d842_permutation_filter(x):
    if isinstance(x, str):
        x = json.loads(x)
    if not isinstance(x, list):
        return False
    for item in x:
        available_exons = item['exons'].get('pdgfra', [])
        if 18 in available_exons:
            for codon in item.get('codons_pairs', []):
                available_codons = codon['codons']
                if 842 in available_codons and codon['altern'] & ALTERN.PERMUTATION:
                    return True
    return False
In [6]:
def pr_auc_score(y_true, y_pred):
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    return auc(recall, precision)

Load data¶

In [7]:
df2 = pd.read_csv("data.csv.gz", index_col=0)
df_rad = pd.read_csv("radiomics.csv.gz", index_col=0)

Baseline characteristics¶

In [8]:
_location_sort = {"胃": 0, "十二指肠": 1, "小肠": 2, "直肠": 3, "系膜": 4, "后腹膜": 5, "定位困难": 6, "其它": 7}
_location_rename = {
    "胃": "Stomach",
    "十二指肠": "Duodenum",
    "小肠": "Small intestine",
    "直肠": "Rectum",
    "系膜": "Mesenterium",
    "网膜": "Periterium",
    "后腹膜": "Retroperiterium",
    "定位困难": "Difficult to locate",
    "其它": "Others",
}

genes_of_interest = [
    ("kit", [9]),
    ("kit", [11]),
    ("kit", [13]),
    ("kit", [17]),
    ("pdgfra", [12]),
    #("pdgfra", [14]),
    ("pdgfra", [18]),
]
gene_cols = [f"custom_{i[0]}{i[1][0]}" for i in genes_of_interest]
In [9]:
def t_test(value_col, table, filter_vals=[True, False], filter_col="custom_train"):
    args = [table[table[filter_col] == f][value_col] for f in filter_vals]
    return S.ttest_ind(*args)[1]

def contingency(table):
    if np.min(table) > 5:
        return S.chi2_contingency(table)
    elif table.shape == (2, 2):
        return S.fisher_exact(table)
    else:
        return [np.nan, np.nan]

def _format_p(p: float):
    return "%.3f" % p if p >= 0.001 else "< 0.001"

def baseline(
    data=df_rad,
    group_col="custom_train",
    groups=[True, False],
    group_names=["Training cohort", "Validation cohort"],
    gene_mutation=True,
):
    results = pd.DataFrame()
    _size = data.groupby(group_col).size()
    _name_map = dict(zip(groups, group_names))

    # Age
    _age_mean = data.groupby(group_col)["custom_age"].mean()
    _age_std = data.groupby(group_col)["custom_age"].std()
    _age_p_value = t_test("custom_age", data, filter_col=group_col, filter_vals=groups)
    _age_p = _format_p(_age_p_value)
    _age = pd.concat([_age_mean, _age_std], axis=1).apply(lambda r: "%.2f ± %.2f" % (r.iloc[0], r.iloc[1]), axis=1)
    _age.loc["p"] = _age_p
    _age = _age.rename("Age (years)")
    # Gender
    _gender_data = data.groupby(["custom_gender", group_col]).size().reset_index()
    _gender_pivot = _gender_data.pivot_table(0, index=[group_col], columns="custom_gender").fillna(0)
    _gender_pivot_pct = (_gender_pivot.T / _size).T
    _gender_text = pd.DataFrame(np.apply_along_axis(lambda r: "%3d (%.2f%%)" % (r[0], r[1] * 100), 0, np.stack([_gender_pivot, _gender_pivot_pct])), index=_gender_pivot.index, columns=_gender_pivot.columns)
    _gender_text = _gender_text.rename({
        0: "Female",
        1: "Male",
    }, axis=1)
    _gender_p_value = contingency(_gender_pivot)[1]
    _gender_p = pd.Series({"p": _format_p(_gender_p_value)}, name="Sex")

    # Size
    _diameter_mean = data.groupby([group_col])["original_shape2D_MaximumDiameter"].mean()
    _diameter_std = data.groupby([group_col])["original_shape2D_MaximumDiameter"].std()
    _diameter_p_value = t_test("original_shape2D_MaximumDiameter", data, filter_col=group_col, filter_vals=groups)
    _diameter_p = _format_p(_diameter_p_value)
    _diameter = pd.concat([_diameter_mean, _diameter_std], axis=1).apply(lambda r: "%.2f ± %.2f" % (r.iloc[0], r.iloc[1]), axis=1)
    _diameter.loc["p"] = _diameter_p
    _diameter = _diameter.rename("Diameter (mm)")

    # Location
    _location_data = data.groupby(["custom_location", group_col]).size().reset_index()
    _location_pivot = _location_data.pivot_table(0, index=[group_col], columns="custom_location").fillna(0)
    _location_pivot = _location_pivot.sort_index(axis=1, key=lambda x: x.map(_location_sort))
    _location_pivot_pct = (_location_pivot.T / _size).T
    _location_text = pd.DataFrame(np.apply_along_axis(lambda r: "%3d (%.2f%%)" % (r[0], r[1] * 100), 0, np.stack([_location_pivot, _location_pivot_pct])), index=_location_pivot.index, columns=_location_pivot.columns)
    _location_text = _location_text.rename(lambda x: _location_rename.get(x, x), axis=1)
    _location_p_value = contingency(_location_pivot)[1]
    _location_p = pd.Series({"p": _format_p(_location_p_value)}, name="Location")

    # Gene mutation
    if gene_mutation:
        _gene_p = pd.Series({}, name="Gene mutation")
        _gene_values = []
        for gene, gene_col in zip(genes_of_interest, gene_cols):
            _gene_label = f"{gene[0].upper()} {gene[1][0]}"
            _gene_data = data.groupby([gene_col, group_col]).size().reset_index()
            _gene_pivot = _gene_data.pivot_table(0, index=[group_col], columns=gene_col).fillna(0)
            _gene_p_value = contingency(_gene_pivot)[1]
            _gene_data = _gene_pivot.loc[:, True]
            _gene_data_pct = (_gene_data.T / _size).T
            _gene_text = pd.Series(
                np.apply_along_axis(lambda r: "%d (%.2f%%)" % (r[0], r[1] * 100), 0, np.stack([_gene_data, _gene_data_pct])),
                name=_gene_label
            )
            _gene_text["p"] = _format_p(_gene_p_value)
            _gene_values.append(_gene_text)

        gene_col = "custom_genewt"
        _gene_data = data.groupby([gene_col, group_col]).size().reset_index()
        _gene_pivot = _gene_data.pivot_table(0, index=[group_col], columns=gene_col).fillna(0)
        _gene_p_value = contingency(_gene_pivot)[1]
        _gene_data = _gene_pivot.loc[:, True]
        _gene_data_pct = (_gene_data.T / _size).T
        _gene_text = pd.Series(
            np.apply_along_axis(lambda r: "%d (%.2f%%)" % (r[0], r[1] * 100), 0, np.stack([_gene_data, _gene_data_pct])),
            name="Wildtype"
        )
        _gene_text["p"] = _format_p(_gene_p_value)
        _gene_values.append(_gene_text)
        _gene_values = pd.concat(_gene_values, axis=1)

    results = pd.concat([results, _age, _gender_p, _gender_text, _location_p, _location_text], axis=1)
    if gene_mutation:
        results = pd.concat([results, _gene_p, _gene_values], axis=1)

    _sort = {True: 0, False: 1, "p": 2}
    results = results.sort_index(axis=0, key=lambda x: x.map(_sort))
    results.rename(index=lambda x: "%s (n = %d)" % (_name_map.get(x, x), _size[x]) if x != "p" else x, inplace=True)
    return results.T.fillna('')

baseline()
Out[9]:
Training cohort (n = 487) Validation cohort (n = 385) p
Age (years) 61.32 ± 12.01 60.63 ± 11.02 0.382
Sex 0.089
Female 238 (48.87%) 165 (42.86%)
Male 249 (51.13%) 220 (57.14%)
Location 0.201
Stomach 298 (61.19%) 258 (67.01%)
Duodenum 45 (9.24%) 38 (9.87%)
Small intestine 100 (20.53%) 63 (16.36%)
Others 44 (9.03%) 26 (6.75%)
Gene mutation
KIT 9 40 (8.21%) 32 (8.31%) 1.000
KIT 11 378 (77.62%) 275 (71.43%) 0.044
KIT 13 13 (2.67%) 10 (2.60%) 1.000
KIT 17 6 (1.23%) 7 (1.82%) 0.669
PDGFRA 12 6 (1.23%) 0 (0.00%) 0.037
PDGFRA 18 28 (5.75%) 23 (5.97%) 1.000
Wildtype 22 (4.52%) 46 (11.95%) < 0.001
In [10]:
_stomach_data = df_rad[["custom_train", "custom_location"]].copy()
_stomach_data["custom_location"] = _stomach_data["custom_location"].map(lambda x: x == "其它")
_stomach_data = _stomach_data.groupby(["custom_train", "custom_location"]).size().reset_index()
_stomach_pivot = _stomach_data.pivot_table(0, index=["custom_location"], columns="custom_train").fillna(0)
contingency(_stomach_pivot)
Out[10]:
Chi2ContingencyResult(statistic=1.222838509323792, pvalue=0.26880431192422477, dof=1, expected_freq=array([[354.0940367, 447.9059633],
       [ 30.9059633,  39.0940367]]))
In [11]:
baseline(
    data=df_rad[df_rad["custom_train"]],
    group_col="custom_d842v",
    groups=[True, False],
    group_names=["D842V-mutant", "D842V-wildtype"],
    gene_mutation=False
)
Out[11]:
D842V-mutant (n = 24) D842V-wildtype (n = 463) p
Age (years) 59.79 ± 11.91 61.40 ± 12.02 0.523
Sex 0.077
Female 7 (29.17%) 231 (49.89%)
Male 17 (70.83%) 232 (50.11%)
Location < 0.001
Stomach 21 (87.50%) 277 (59.83%)
Duodenum 1 (4.17%) 44 (9.50%)
Small intestine 0 (0.00%) 100 (21.60%)
Others 2 (8.33%) 42 (9.07%)
In [12]:
_stomach_data = df_rad[df_rad["custom_train"]][["custom_d842v", "custom_location"]].copy()
_stomach_data["custom_location"] = _stomach_data["custom_location"].map(lambda x: x == "胃")
_stomach_data = _stomach_data.groupby(["custom_d842v", "custom_location"]).size().reset_index()
_stomach_pivot = _stomach_data.pivot_table(0, index=["custom_location"], columns="custom_d842v").fillna(0)
contingency(_stomach_pivot)
Out[12]:
SignificanceResult(statistic=4.700361010830325, pvalue=0.008479680102150383)
In [13]:
baseline(
    data=df_rad[~df_rad["custom_train"]],
    group_col="custom_d842v",
    groups=[True, False],
    group_names=["D842V-mutant", "D842V-wildtype"],
    gene_mutation=False
)
Out[13]:
D842V-mutant (n = 17) D842V-wildtype (n = 368) p
Age (years) 63.53 ± 9.72 60.50 ± 11.07 0.268
Sex 0.043
Female 3 (17.65%) 162 (44.02%)
Male 14 (82.35%) 206 (55.98%)
Location < 0.001
Stomach 16 (94.12%) 242 (65.76%)
Duodenum 1 (5.88%) 37 (10.05%)
Small intestine 0 (0.00%) 63 (17.12%)
Others 0 (0.00%) 26 (7.07%)
In [14]:
_stomach_data = df_rad[~df_rad["custom_train"]][["custom_d842v", "custom_location"]].copy()
_stomach_data["custom_location"] = _stomach_data["custom_location"].map(lambda x: x == "胃")
_stomach_data = _stomach_data.groupby(["custom_d842v", "custom_location"]).size().reset_index()
_stomach_pivot = _stomach_data.pivot_table(0, index=["custom_location"], columns="custom_d842v").fillna(0)
contingency(_stomach_pivot)
Out[14]:
SignificanceResult(statistic=8.330578512396695, pvalue=0.015397786025795803)
In [15]:
_a =  baseline(
    data=df_rad[df_rad["custom_train"]],
    group_col="custom_d842v",
    groups=[True, False],
    group_names=["D842V-mutant", "D842V-wildtype"],
    gene_mutation=False
)
_b = baseline(
    data=df_rad[~df_rad["custom_train"]],
    group_col="custom_d842v",
    groups=[True, False],
    group_names=["D842V-mutant", "D842V-wildtype"],
    gene_mutation=False
)

pd.concat([_a, _b], axis=1)
Out[15]:
D842V-mutant (n = 24) D842V-wildtype (n = 463) p D842V-mutant (n = 17) D842V-wildtype (n = 368) p
Age (years) 59.79 ± 11.91 61.40 ± 12.02 0.523 63.53 ± 9.72 60.50 ± 11.07 0.268
Sex 0.077 0.043
Female 7 (29.17%) 231 (49.89%) 3 (17.65%) 162 (44.02%)
Male 17 (70.83%) 232 (50.11%) 14 (82.35%) 206 (55.98%)
Location < 0.001 < 0.001
Stomach 21 (87.50%) 277 (59.83%) 16 (94.12%) 242 (65.76%)
Duodenum 1 (4.17%) 44 (9.50%) 1 (5.88%) 37 (10.05%)
Small intestine 0 (0.00%) 100 (21.60%) 0 (0.00%) 63 (17.12%)
Others 2 (8.33%) 42 (9.07%) 0 (0.00%) 26 (7.07%)
In [16]:
baseline(
    data=df_rad[~df_rad["custom_train"]],
    group_col="custom_genewt",
    groups=[True, False],
    group_names=["KIT- and PDGFRA-wildtype", "KIT- or D842V-mutant"],
    gene_mutation=False
)
Out[16]:
KIT- and PDGFRA-wildtype (n = 46) KIT- or D842V-mutant (n = 339) p
Age (years) 61.00 ± 10.83 60.58 ± 11.06 0.809
Sex 0.098
Female 14 (30.43%) 151 (44.54%)
Male 32 (69.57%) 188 (55.46%)
Location < 0.001
Stomach 27 (58.70%) 231 (68.14%)
Duodenum 10 (21.74%) 28 (8.26%)
Small intestine 8 (17.39%) 55 (16.22%)
Others 1 (2.17%) 25 (7.37%)

ICC¶

In [17]:
#columns = [i for i in df_rad.columns if i.startswith("original") and "shape" not in i]
columns = [i for i in df_rad.columns if not i.startswith("custom") and not i.startswith("diagnostics") and not i.startswith("lbp-3D") and "shape" not in i]
#columns = [i for i in mdf_train_case.columns if i.startswith("original") or i.startswith("sq") or i.startswith("logarithm") or i.startswith("exponential") or i.startswith("gradient") and "shape" not in i]
In [18]:
# Remove all-values-were same columns
columns = np.array(columns)[np.where(df_rad[df_rad.custom_train][columns].std() != 0)]
In [19]:
len(columns)
Out[19]:
978
In [20]:
# Use ICC to select columns
cache_intra_file = f"cache/intra.txt"

if os.path.exists(cache_intra_file):
    with open(cache_intra_file, "r") as f:
        available_intra = f.read().split()
else:
    origin = pd.read_csv("results-ct4phase-renji-anyty-resampled-2024-04-29.csv", index_col="custom_studyId")
    intra = pd.read_csv("results-ct4phase-renji-anyty-resampled-intraobserver-2024-05-02.csv", index_col="custom_studyId")
    origin = origin.loc[origin.custom_phase == "v"]
    intra = intra.loc[intra.custom_phase == "v"]
    _intra = intra.copy()
    _intra["rater"] = 0
    _intra["target"] = _intra.index.values
    _origin = origin.loc[_intra.index.values, columns]
    _origin["rater"] = 1
    _origin["target"] = _origin.index.values
    _intra_all = pd.concat([_intra, _origin], ignore_index=True, axis=0)
    
    available_intra = []
    for c in columns:
        # Two-way random effects, absolute agreement, single rater/measurement
        icc = pg.intraclass_corr(_intra_all, targets="target", raters="rater", ratings=c)
        icc = icc.set_index("Type").loc["ICC2", "ICC"]
        if icc > 0.75:
            available_intra.append(c)

    with open(cache_intra_file, "w") as f:
        f.write("\n".join(available_intra))

print(f"Available columns (intraobserver): {len(available_intra)} / {len(columns)}")
Available columns (intraobserver): 702 / 978
In [21]:
# Use ICC to select columns
cache_inter_file = f"cache/inter.txt"

if os.path.exists(cache_inter_file):
    with open(cache_inter_file, "r") as f:
        available_inter = f.read().split()
else:
    origin = pd.read_csv("results-ct4phase-renji-anyty-resampled-2024-04-29.csv", index_col="custom_studyId")
    inter = pd.read_csv("results-ct4phase-renji-anyty-resampled-interobserver-smooth-2024-05-26.csv", index_col="custom_studyId")
    origin = origin.loc[origin.custom_phase == "v"]
    inter = inter.loc[intra.custom_phase == "v"]
    _inter = inter.copy()
    _inter["rater"] = 0
    _inter["target"] = _inter.index.values
    _origin = origin.loc[_inter.index.values, columns]
    _origin["rater"] = 1
    _origin["target"] = _origin.index.values
    _inter_all = pd.concat([_inter, _origin], ignore_index=True, axis=0)
    
    available_inter = []
    for c in columns:
        # Two-way random effects, absolute agreement, single rater/measurement
        icc = pg.intraclass_corr(_inter_all, targets="target", raters="rater", ratings=c)
        icc = icc.set_index("Type").loc["ICC2", "ICC"]
        if icc > 0.75:
            available_inter.append(c)

    with open(cache_inter_file, "w") as f:
        f.write("\n".join(available_inter))

print(f"Available columns (interobserver): {len(available_inter)} / {len(columns)}")
Available columns (interobserver): 646 / 978
In [22]:
# Filter columns with ICC
num_all_columns = len(columns)
columns = sorted(list(set(available_intra).intersection(set(available_inter))))

print(f"Available columns (all): {len(columns)} / {num_all_columns}")
Available columns (all): 601 / 978

Feature selection¶

In [23]:
RANDOM_STATE=11
In [24]:
class CustomOpt:
    def __init__(self, model):
        self.best_estimator_ = model

clin_columns = ["custom_age", "custom_gender", "original_shape2D_MaximumDiameter", "custom_is-gastric"]
columns_to_use = list(columns) + clin_columns[:-1]

X_train = df_rad[df_rad.custom_train]
y_train = X_train["custom_d842v"]
X_test = df_rad[~df_rad.custom_train]
y_test = X_test["custom_d842v"]
X_train = pd.concat([X_train[columns_to_use], pd.Series(X_train["custom_location"] == "胃", name="custom_is-gastric")], axis=1)
X_test = pd.concat([X_test[columns_to_use], pd.Series(X_test["custom_location"] == "胃", name="custom_is-gastric")], axis=1)
In [25]:
max_nodes = 20
num_trees = 10

sm = RandomUnderSampler(random_state=RANDOM_STATE)
X_train_sm, y_train_sm = sm.fit_resample(X_train[columns], y_train)
rf = RandomForestClassifier(num_trees, random_state=RANDOM_STATE, max_depth=2)
rf.fit(X_train_sm, y_train_sm)
impRF = rf.feature_importances_ / rf.feature_importances_.max()

r = pyper.R(use_pandas='True')
r.assign("x_train", X_train_sm.values)
r.assign("y_train", y_train_sm.values)
r.assign("seed", RANDOM_STATE)
r.assign("ntree", num_trees)
r.assign("max_nodes", max_nodes)
r.assign("coefReg", 0.5)
r.assign("imp", impRF)
r("""
library(RRF);set.seed(seed)
gamma <- 1
coefReg <- (1-gamma)+gamma*imp
grf <- RRF(x_train,as.factor(y_train), ntree=ntree, max_nodes=max_nodes, flagReg = 1, max_nodes=max_nodes, coefReg=coefReg)
""")
best_features = np.array(columns)[r.get("grf$feaSet") - 1].tolist()
if isinstance(best_features, str):
    best_features = [best_features]

best_features
Out[25]:
['squareroot_firstorder_RobustMeanAbsoluteDeviation',
 'wavelet-HL_gldm_GrayLevelNonUniformity']
In [26]:
class CustomOpt:
    def __init__(self, model):
        self.best_estimator_ = model

columns_to_use = best_features + clin_columns

model = RandomForestClassifier(50, random_state=RANDOM_STATE, class_weight="balanced", max_depth=2)
model.fit(X_train[columns_to_use], y_train)

opt = CustomOpt(model)
In [27]:
y_pred_train = opt.best_estimator_.predict_proba(X_train[columns_to_use])[:, 1]
y_pred_test = opt.best_estimator_.predict_proba(X_test[columns_to_use])[:, 1]

train_results = {
    "pred_radioclinical": y_pred_train,
    "label": y_train,
}
test_results = {
    "pred_radioclinical": y_pred_test,
    "label": y_test,
}
models = {
    "radioclinical": model,
}

_results = {
    "AUC (train)": roc_auc_score(y_train, y_pred_train),
    "AUC (test)": roc_auc_score(y_test, y_pred_test),
    "AP (train)": average_precision_score(y_train, y_pred_train),
    "AP (test)": average_precision_score(y_test, y_pred_test),
}

fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_pred_train)

_results.update({
    "Confusion Matrix (train, 0.5)": confusion_matrix(y_train, y_pred_train > 0.5),
    "Confusion Matrix (test, 0.5)": confusion_matrix(y_test, y_pred_test > 0.5),
    "G-mean (train, 0.5)": geometric_mean_score(y_train, y_pred_train > 0.5),
    "G-mean (test, 0.5)": geometric_mean_score(y_test, y_pred_test > 0.5),
    "F1-score (train, 0.5)": f1_score(y_train, y_pred_train > 0.5),
    "F1-score (test, 0.5)": f1_score(y_test, y_pred_test > 0.5),
})

_results
Out[27]:
{'AUC (train)': 0.8877339812814975,
 'AUC (test)': 0.7283407928388747,
 'AP (train)': 0.42259147584162193,
 'AP (test)': 0.2502574414224302,
 'Confusion Matrix (train, 0.5)': array([[380,  83],
        [  7,  17]]),
 'Confusion Matrix (test, 0.5)': array([[309,  59],
        [  6,  11]]),
 'G-mean (train, 0.5)': 0.7624654036221331,
 'G-mean (test, 0.5)': 0.737101359598699,
 'F1-score (train, 0.5)': 0.27419354838709675,
 'F1-score (test, 0.5)': 0.25287356321839083}
In [28]:
results_train = permutation_importance(opt.best_estimator_, X_train[columns_to_use], y_train, scoring=lambda est, x, y: roc_auc_score(y, est.predict_proba(x)[:, 1]), random_state=RANDOM_STATE, n_jobs=-1)
results_test = permutation_importance(opt.best_estimator_, X_test[columns_to_use], y_test, scoring=lambda est, x, y: roc_auc_score(y, est.predict_proba(x)[:, 1]), random_state=RANDOM_STATE, n_jobs=-1)

_fi = pd.DataFrame(
    np.array([columns_to_use, results_train["importances_mean"], results_test["importances_mean"]]).T,
    columns=["Column", "Permutation Importance (train)", "Permutation Importance (test)"]
).sort_values("Permutation Importance (train)", ascending=False).set_index("Column")

# Plot train
sorted_importances_idx = results_train.importances_mean.argsort()
importances_train = pd.DataFrame(
    results_train.importances[sorted_importances_idx].T,
    columns=np.array(columns_to_use)[sorted_importances_idx],
)
ax = importances_train.plot(kind="box", vert=False, whis=10, figsize=(8, 6))
ax.set_title(f"Permutation Importances")
ax.axvline(x=0, color="k", linestyle="--")
ax.set_xlabel("Decrease in accuracy score")
ax.figure.tight_layout()
No description has been provided for this image

Alternative models¶

In [29]:
X_train_clin = X_train[clin_columns]
X_test_clin = X_test[clin_columns]

model_clinical = RandomForestClassifier(50, random_state=RANDOM_STATE, class_weight='balanced', max_depth=2)
model_clinical.fit(X_train_clin, y_train)
models["clinical"] = model_clinical
opt = CustomOpt(model_clinical)
In [30]:
y_pred_train = opt.best_estimator_.predict_proba(X_train_clin)[:, 1]
y_pred_test = opt.best_estimator_.predict_proba(X_test_clin)[:, 1]

train_results["pred_clinical"] = y_pred_train
test_results["pred_clinical"] = y_pred_test

_results = {
    "AUC (train)": roc_auc_score(y_train, y_pred_train),
    "AUC (test)": roc_auc_score(y_test, y_pred_test),
    "AP (train)": average_precision_score(y_train, y_pred_train),
    "AP (test)": average_precision_score(y_test, y_pred_test),
}

fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_pred_train)

_results.update({
    "Confusion Matrix (train, 0.5)": confusion_matrix(y_train, y_pred_train > 0.5),
    "Confusion Matrix (test, 0.5)": confusion_matrix(y_test, y_pred_test > 0.5),
    "G-mean (train, 0.5)": geometric_mean_score(y_train, y_pred_train > 0.5),
    "G-mean (test, 0.5)": geometric_mean_score(y_test, y_pred_test > 0.5),
    "F1-score (train, 0.5)": f1_score(y_train, y_pred_train > 0.5),
    "F1-score (test, 0.5)": f1_score(y_test, y_pred_test > 0.5),
})

_results
Out[30]:
{'AUC (train)': 0.8295536357091433,
 'AUC (test)': 0.7372122762148338,
 'AP (train)': 0.371304517613862,
 'AP (test)': 0.10178893871520615,
 'Confusion Matrix (train, 0.5)': array([[362, 101],
        [  6,  18]]),
 'Confusion Matrix (test, 0.5)': array([[242, 126],
        [  5,  12]]),
 'G-mean (train, 0.5)': 0.7657630759921216,
 'G-mean (test, 0.5)': 0.6813181146876506,
 'F1-score (train, 0.5)': 0.2517482517482518,
 'F1-score (test, 0.5)': 0.15483870967741936}
In [31]:
X_train_radiol = X_train[best_features]
X_test_radiol = X_test[best_features]

model_radiol = RandomForestClassifier(50, random_state=RANDOM_STATE, class_weight='balanced', max_depth=2)
models["radiol"] = model_radiol
model_radiol.fit(X_train_radiol, y_train)
opt = CustomOpt(model_radiol)
In [32]:
y_pred_train = opt.best_estimator_.predict_proba(X_train_radiol)[:, 1]
y_pred_test = opt.best_estimator_.predict_proba(X_test_radiol)[:, 1]

train_results["pred_radiol"] = y_pred_train
test_results["pred_radiol"] = y_pred_test

_results = {
    "AUC (train)": roc_auc_score(y_train, y_pred_train),
    "AUC (test)": roc_auc_score(y_test, y_pred_test),
    "AP (train)": average_precision_score(y_train, y_pred_train),
    "AP (test)": average_precision_score(y_test, y_pred_test),
}

fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_pred_train)

_results.update({
    "Confusion Matrix (train, 0.5)": confusion_matrix(y_train, y_pred_train > 0.5),
    "Confusion Matrix (test, 0.5)": confusion_matrix(y_test, y_pred_test > 0.5),
    "G-mean (train, 0.5)": geometric_mean_score(y_train, y_pred_train > 0.5),
    "G-mean (test, 0.5)": geometric_mean_score(y_test, y_pred_test > 0.5),
    "F1-score (train, 0.5)": f1_score(y_train, y_pred_train > 0.5),
    "F1-score (test, 0.5)": f1_score(y_test, y_pred_test > 0.5),
})

_results
Out[32]:
{'AUC (train)': 0.8492170626349891,
 'AUC (test)': 0.6289162404092071,
 'AP (train)': 0.30167936684841057,
 'AP (test)': 0.10219058704692797,
 'Confusion Matrix (train, 0.5)': array([[361, 102],
        [  5,  19]]),
 'Confusion Matrix (test, 0.5)': array([[300,  68],
        [ 11,   6]]),
 'G-mean (train, 0.5)': 0.7856593531235201,
 'G-mean (test, 0.5)': 0.5363989048891137,
 'F1-score (train, 0.5)': 0.2620689655172414,
 'F1-score (test, 0.5)': 0.13186813186813187}

Plots¶

In [33]:
class AttributeDict(dict):
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

# color palettes
C = AttributeDict({
    "blue": "#81A4CD",
    "yellow": "#D7B377",
    "red": "#E27893",
    "pink": "#E07B94",
    "purple": "#A69BBF",
})
C.update({
    "darkblue": "#5F8CBF",
    "lightblue": "#C5D5E8",
    "darkpink": "#DC6A87",
})

Demostration of undersampling

In [34]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import plotly.graph_objects as go
import plotly.express as px

def _gene_mutation(x):
    for gene_col in gene_cols:
        if x[gene_col]:
            return gene_col
    return "custom_genewt"

lda = LinearDiscriminantAnalysis(n_components=2)

_df = df_rad[df_rad.custom_train]
_gene = _df.apply(_gene_mutation, axis=1)

_df = pd.DataFrame(lda.fit_transform(_df[columns], _gene), columns=["axis-0", "axis-1"], index=X_train.index)
_df.loc[:, "label"] = y_train
_df.loc[:, "selected"] = 0
_df.loc[X_train_sm.index, "selected"] = 1
_df.loc[:, "label_selected"] = _df.loc[:, "label"].map(lambda x: x << 1) + _df.loc[:, "selected"]
_df.loc[:, "color"] = _df.loc[:, "label_selected"].map({
    0b00: C.lightblue,
    0b01: C.darkblue,
    0b10: C.red,
    0b11: C.darkpink,
})
_df.loc[:, "axis-1"] += 2

fig = px.scatter(_df, x="axis-0", y="axis-1", color="color", color_discrete_map="identity")
fig.update_layout(
    template="plotly_white",
    xaxis_title='',
    yaxis_title='',
    xaxis=dict(range=(-5, 5), showticklabels=False),
    yaxis=dict(range=(-5.5, 5.5), scaleanchor="x", showticklabels=False, tickvals=(-5, 0, 5)),
    width=300, height=300,
    margin=dict(t=5, b=5, l=5, r=5),
)
#fig.write_image("figures/figure-2_3_1.svg")
fig.show()

Demostration of feature selection

In [35]:
_pi = permutation_importance(
    rf, X_train_sm, y_train_sm, random_state=RANDOM_STATE
)
_index = np.argsort(_pi.importances_mean)
_df = pd.DataFrame({
    "imp": _pi.importances_mean[_index[-10:]],
    "err": _pi.importances_std[_index[-10:]],
})
fig = px.bar(_df, x="imp", error_x="err")
fig.update_layout(
    template="plotly_white",
    xaxis_title='',
    yaxis_title='',
    xaxis=dict(showticklabels=False),
    yaxis=dict(showticklabels=False),
    width=150, height=300,
    margin=dict(t=5, b=5, l=5, r=5),
)
#fig.write_image("figures/figure-2_3_2.svg")
fig.show()

ROC Plot

In [36]:
from sklearn.metrics import auc
import plotly.graph_objects as go
import plotly.express as px

fig = go.Figure()
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

# The histogram of scores compared to true labels
fpr, tpr, thresholds = roc_curve(train_results["label"], train_results["pred_clinical"])
_auc = auc(fpr, tpr)
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", line_color=C.blue, line_dash="dot",
    name=f"Training cohort (clinical, AUC={'%.2f' % _auc})",
))

fpr, tpr, thresholds = roc_curve(test_results["label"], test_results["pred_clinical"])
_auc = auc(fpr, tpr)
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", line_color=C.blue,
    name=f"Validation cohort (clinical, AUC={'%.2f' % _auc})",
))

fpr, tpr, thresholds = roc_curve(train_results["label"], train_results["pred_radioclinical"])
_auc = auc(fpr, tpr)
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", line_color=C.red, line_dash="dot",
    name=f"Training cohort (clinical+radiomics, AUC={'%.2f' % _auc})",
))

fpr, tpr, thresholds = roc_curve(test_results["label"], test_results["pred_radioclinical"])
_auc = auc(fpr, tpr)
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", line_color=C.red,
    name=f"Validation cohort (clinical+radiomics, AUC={'%.2f' % _auc})",
))

fig.update_layout(
    template="plotly_white",
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    yaxis=dict(scaleanchor="x", scaleratio=1),
    xaxis=dict(constrain='domain'),
    width=600, height=600,
    legend=dict(y=0.05, x=0.42, xref="paper", traceorder='reversed', font_size=12),
    margin=dict(t=5, b=5, l=5, r=5),
)
#fig.write_image("figures/figure-3.png", width=600, height=600, scale=5)
fig.show()
In [37]:
fig.update_layout(
    showlegend=False,
    xaxis_title='',
    yaxis_title='',
    xaxis=dict(showticklabels=False),
    yaxis=dict(showticklabels=False),
    width=300, height=300,
)
#fig.write_image("figures/figure-2_4_1.svg")
fig.show()

PRC Plot

In [38]:
# from sklearn.metrics import auc
import plotly.graph_objects as go
import plotly.express as px

chance_level = 17 / 385
fig = go.Figure()
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=chance_level, y1=chance_level,
)

# The histogram of scores compared to true labels
precision, recall, _ = precision_recall_curve(train_results["label"], train_results["pred_clinical"])
_ap = average_precision_score(train_results["label"], train_results["pred_clinical"])
fig.add_trace(go.Scatter(x=recall, y=precision, mode="lines", line_color=C.blue, line_dash="dot", line_shape="hv",
    name=f"Training cohort (clinical, AP={_ap:.2f})",
))

precision, recall, _ = precision_recall_curve(test_results["label"], test_results["pred_clinical"])
_ap = average_precision_score(test_results["label"], test_results["pred_clinical"])
fig.add_trace(go.Scatter(x=recall, y=precision, mode="lines", line_color=C.blue, line_shape="hv",
    name=f"Validation cohort (clinical, AP={_ap:.2f})",
))

precision, recall, _ = precision_recall_curve(train_results["label"], train_results["pred_radioclinical"])
_ap = average_precision_score(train_results["label"], train_results["pred_radioclinical"])
fig.add_trace(go.Scatter(x=recall, y=precision, mode="lines", line_color=C.red, line_dash="dot", line_shape="hv",
    name=f"Training cohort (clinical+radiomics, AP={_ap:.2f})",
))

precision, recall, _ = precision_recall_curve(test_results["label"], test_results["pred_radioclinical"])
_ap = average_precision_score(test_results["label"], test_results["pred_radioclinical"])
fig.add_trace(go.Scatter(x=recall, y=precision, mode="lines", line_color=C.red, line_shape="hv",
    name=f"Validation cohort (clinical+radiomics, AP={_ap:.2f})",
))

fig.update_layout(
    template="plotly_white",
    xaxis_title='Recall',
    yaxis_title='Precision',
    yaxis=dict(scaleanchor="x", scaleratio=1),
    xaxis=dict(constrain='domain'),
    width=600, height=600,
    legend=dict(y=0.95, x=0.42, xref="paper", traceorder='reversed', font_size=12),
    margin=dict(t=5, b=5, l=5, r=5),
)
#fig.write_image("figures/figure-4.png", scale=5)
fig.show()
In [39]:
fig.update_layout(
    showlegend=False,
    xaxis_title='',
    yaxis_title='',
    xaxis=dict(showticklabels=False),
    yaxis=dict(showticklabels=False),
    width=300, height=300,
)
#fig.write_image("figures/figure-2_4_2.svg")
fig.show()

Scatter

In [40]:
import plotly.express as px

_df = pd.concat([X_train, y_train.rename("D842V mutation")], axis=1)
#_df = _df[np.logical_and(X_train["custom_is-gastric"], X_train["custom_gender"] == 1)]
_df = _df[X_train["custom_is-gastric"]]
#_data = _df[best_features].copy()
#_data[clin_columns] = np.nan
_data = _df[best_features + clin_columns]
_df["prediction"] = models["radioclinical"].predict_proba(_data)[:, 1]

fig = px.scatter(_df, *best_features[:2], color="prediction", color_continuous_scale=px.colors.sequential.RdBu_r,
                 symbol="D842V mutation", symbol_sequence=["circle-open", "circle"],
                 log_x=True, log_y=True)

fig.update_layout(
    template="plotly_white",
    width=600, height=600,
    coloraxis_colorbar_y = 0.4,
    coloraxis_colorbar_len = 0.8,
)
#fig.write_image("figures/figure-5a.png", scale=5)
fig
In [41]:
import plotly.express as px

_df = pd.concat([X_test, y_test.rename("D842V mutation")], axis=1)
#_df = _df[np.logical_and(X_test["custom_is-gastric"], X_test["custom_gender"] == 1)]
_df = _df[X_test["custom_is-gastric"]]
#_data = _df[best_features].copy()
#_data[clin_columns] = np.nan
_data = _df[best_features + clin_columns]
_df["prediction"] = models["radioclinical"].predict_proba(_data)[:, 1]

fig = px.scatter(_df, *best_features[:2], color="prediction", color_continuous_scale=px.colors.sequential.RdBu_r,
                 symbol="D842V mutation", symbol_sequence=["circle-open", "circle"],
                 log_x=True, log_y=True)

fig.update_layout(
    template="plotly_white",
    width=600, height=600,
    coloraxis_colorbar_y = 0.4,
    coloraxis_colorbar_len = 0.8,
)
#fig.write_image("figures/figure-5b.png", scale=5)
fig

Visualization of texture features

In [42]:
from scipy import ndimage

def show_image(im, seg=None, edge=False, pos=100, win=400, alpha=0.2, seg_cmap="flag_r"):
    low = pos - win / 2
    high = pos + win/2 / 2

    fig = plt.figure(figsize=(8, 8))
    plt.imshow((np.clip(im, low, high) - low) / (high - low) , cmap="gray")

    if seg is not None:
        if edge:
            seg = ndimage.binary_dilation(seg, iterations=edge) - seg
        plt.imshow(seg, alpha=alpha, cmap=seg_cmap)
In [43]:
sub001 = np.load("sample_data/sub001.npz")
In [44]:
show_image(sub001["im"][50:300, 150:400], sub001["seg"][50:300, 150:400], edge=2)
plt.axis("off")
plt.tight_layout()
#plt.savefig("figures/figure-2_2_1.png")
No description has been provided for this image
In [45]:
from skimage.feature import graycomatrix, graycoprops

_window = 4
_im = sub001["im"][50:300, 150:400]
_im = (np.clip(_im, -50, 269) / 5 + 10).astype(int) # Normalized
_seg = sub001["seg"][50:300, 150:400]

_dissimilarity = np.zeros_like(_im)
_contrast = np.zeros_like(_im)

# tumor location 125:215 230:320
for x in range(75, 165):
    for y in range(80, 170):
        _patch = _im[x - _window:x + _window, y - _window:y + _window]
        glcm = graycomatrix(_patch, distances=[5], angles=[0], levels=64, normed=True)
        _dissimilarity[x, y] = graycoprops(glcm, 'dissimilarity').item()
        _contrast[x, y] = graycoprops(glcm, 'contrast').item()
In [46]:
show_image(_im, np.where(_seg, _dissimilarity, 0), alpha=0.3, seg_cmap="gist_ncar")

plt.axis("off")
plt.tight_layout()
#plt.savefig("figures/figure-2_2_2.png")
No description has been provided for this image

Compare results of different models¶

In [47]:
all_results = pd.DataFrame([], columns=["C", "C-CI", "CR", "CR-CI", "p"])

def _format_ci(ci):
    return f"[{ci.confidence_interval.low:.3f}, {ci.confidence_interval.high:.3f}]"
In [48]:
b1 = S.bootstrap(
    (test_results["label"].values, test_results["pred_radioclinical"]),
    lambda x, y: average_precision_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
b2 = S.bootstrap(
    (test_results["label"].values, test_results["pred_clinical"]),
    lambda x, y: average_precision_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
all_results.loc["AP", "CR"] = '%.3f' % average_precision_score(test_results["label"].values, test_results["pred_radioclinical"])
all_results.loc["AP", "C"] = '%.3f' % average_precision_score(test_results["label"].values, test_results["pred_clinical"])
all_results.loc["AP", "CR-CI"] = _format_ci(b1)
all_results.loc["AP", "C-CI"] = _format_ci(b2)
b1.confidence_interval, b2.confidence_interval
Out[48]:
(ConfidenceInterval(low=0.09718711741106247, high=0.48610045048951395),
 ConfidenceInterval(low=0.04489619284529515, high=0.19190377849620294))
In [49]:
_auc_diffs = average_precision_score(test_results["label"].values, test_results["pred_clinical"]) - average_precision_score(test_results["label"].values, test_results["pred_radioclinical"])
_diffs = np.std(b1.bootstrap_distribution - b2.bootstrap_distribution)
_p = 2 * S.norm.cdf(-np.abs(_auc_diffs / _diffs))
all_results.loc["AP", "p"] = "%.3f" % _p
_p
Out[49]:
0.038661547824375
In [50]:
S.norm.cdf(_auc_diffs / _diffs)
Out[50]:
0.0193307739121875
In [51]:
b1 = S.bootstrap(
    (test_results["label"].values, test_results["pred_radioclinical"]),
    lambda x, y: pr_auc_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
b2 = S.bootstrap(
    (test_results["label"].values, test_results["pred_clinical"]),
    lambda x, y: pr_auc_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
all_results.loc["PR-AUC", "CR"] = '%.3f' % pr_auc_score(test_results["label"].values, test_results["pred_radioclinical"])
all_results.loc["PR-AUC", "C"] = '%.3f' % pr_auc_score(test_results["label"].values, test_results["pred_clinical"])
all_results.loc["PR-AUC", "CR-CI"] = _format_ci(b1)
all_results.loc["PR-AUC", "C-CI"] = _format_ci(b2)
b1.confidence_interval, b2.confidence_interval
Out[51]:
(ConfidenceInterval(low=0.08933297505550315, high=0.49689877773021673),
 ConfidenceInterval(low=0.04815144673097113, high=0.18361821584146662))
In [52]:
_auc_diffs = pr_auc_score(test_results["label"].values, test_results["pred_clinical"]) - pr_auc_score(test_results["label"].values, test_results["pred_radioclinical"])
_diffs = np.std(b1.bootstrap_distribution - b2.bootstrap_distribution)
_p = 2 * S.norm.cdf(-np.abs(_auc_diffs / _diffs))
all_results.loc["PR-AUC", "p"] = "%.3f" % _p
_p
Out[52]:
0.05672167924734836
In [53]:
S.norm.cdf(_auc_diffs / _diffs)
Out[53]:
0.02836083962367418
In [54]:
b1 = S.bootstrap(
    (test_results["label"].values, test_results["pred_radioclinical"] > 0.5),
    lambda x, y: geometric_mean_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
b2 = S.bootstrap(
    (test_results["label"].values, test_results["pred_clinical"] > 0.5),
    lambda x, y: geometric_mean_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
all_results.loc["G-Mean", "CR"] = '%.3f' % geometric_mean_score(test_results["label"].values, test_results["pred_radioclinical"] > 0.5)
all_results.loc["G-Mean", "C"] = '%.3f' % geometric_mean_score(test_results["label"].values, test_results["pred_clinical"] > 0.5)
all_results.loc["G-Mean", "CR-CI"] = _format_ci(b1)
all_results.loc["G-Mean", "C-CI"] = _format_ci(b2)
b1.confidence_interval, b2.confidence_interval
Out[54]:
(ConfidenceInterval(low=0.5735282982734137, high=0.8597941520462185),
 ConfidenceInterval(low=0.5417404954690205, high=0.7764762167538287))
In [55]:
_auc_diffs = geometric_mean_score(test_results["label"].values, test_results["pred_clinical"] > 0.5) - geometric_mean_score(test_results["label"].values, test_results["pred_radioclinical"] > 0.5)
_diffs = np.std(b1.bootstrap_distribution - b2.bootstrap_distribution)
_p = 2 * S.norm.cdf(-np.abs(_auc_diffs / _diffs))
all_results.loc["G-Mean", "p"] = "%.3f" % _p
_p
Out[55]:
0.3517213015037173
In [56]:
b1 = S.bootstrap(
    (test_results["label"].values, test_results["pred_radioclinical"] > 0.5),
    lambda x, y: f1_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
b2 = S.bootstrap(
    (test_results["label"].values, test_results["pred_clinical"] > 0.5),
    lambda x, y: f1_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
all_results.loc["F1", "CR"] = '%.3f' % f1_score(test_results["label"].values, test_results["pred_radioclinical"] > 0.5)
all_results.loc["F1", "C"] = '%.3f' % f1_score(test_results["label"].values, test_results["pred_clinical"] > 0.5)
all_results.loc["F1", "CR-CI"] = _format_ci(b1)
all_results.loc["F1", "C-CI"] = _format_ci(b2)
b1.confidence_interval, b2.confidence_interval
Out[56]:
(ConfidenceInterval(low=0.14285714285714285, high=0.40010994310165887),
 ConfidenceInterval(low=0.08763832817918717, high=0.25391171325290435))
In [57]:
_auc_diffs = f1_score(test_results["label"].values, test_results["pred_clinical"] > 0.5) - f1_score(test_results["label"].values, test_results["pred_radioclinical"] > 0.5)
_diffs = np.std(b1.bootstrap_distribution - b2.bootstrap_distribution)
_p = 2 * S.norm.cdf(-np.abs(_auc_diffs / _diffs))
all_results.loc["F1", "p"] = "%.3f" % _p
_p
Out[57]:
0.011721603527162703
In [58]:
b1 = S.bootstrap(
    (test_results["label"].values, test_results["pred_radioclinical"]),
    lambda x, y: roc_auc_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
b2 = S.bootstrap(
    (test_results["label"].values, test_results["pred_clinical"]),
    lambda x, y: roc_auc_score(x, y),
    n_resamples=1000,
    paired=True,
    random_state=RANDOM_STATE,
)
all_results.loc["ROC-AUC", "CR"] = '%.3f' % roc_auc_score(test_results["label"].values, test_results["pred_radioclinical"])
all_results.loc["ROC-AUC", "C"] = '%.3f' % roc_auc_score(test_results["label"].values, test_results["pred_clinical"])
all_results.loc["ROC-AUC", "CR-CI"] = _format_ci(b1)
all_results.loc["ROC-AUC", "C-CI"] = _format_ci(b2)
b1.confidence_interval, b2.confidence_interval
Out[58]:
(ConfidenceInterval(low=0.5156763405069147, high=0.8612384006557819),
 ConfidenceInterval(low=0.6092629886543749, high=0.8330584116992429))
In [59]:
_auc_diffs = roc_auc_score(test_results["label"].values, test_results["pred_clinical"]) - roc_auc_score(test_results["label"].values, test_results["pred_radioclinical"])
_diffs = np.std(b1.bootstrap_distribution - b2.bootstrap_distribution)
_p = 2 * S.norm.cdf(-np.abs(_auc_diffs / _diffs))
all_results.loc["ROC-AUC", "p"] = "%.3f" % _p
_p
Out[59]:
0.8360709706806619
In [60]:
from sklearn.metrics import accuracy_score

tn, fp, fn, tp = confusion_matrix(test_results["label"].values, test_results["pred_radioclinical"] > 0.5).ravel() 
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
accuracy = accuracy_score(test_results["label"].values, test_results["pred_radioclinical"] > 0.5)
all_results.loc["Sensitivity", "CR"] = '%.3f' % sensitivity
all_results.loc["Specificity", "CR"] = '%.3f' % specificity
all_results.loc["Accuracy", "CR"] = '%.3f' % accuracy

tn, fp, fn, tp = confusion_matrix(test_results["label"].values, test_results["pred_clinical"] > 0.5).ravel() 
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
accuracy = accuracy_score(test_results["label"].values, test_results["pred_clinical"] > 0.5)
all_results.loc["Sensitivity", "C"] = '%.3f' % sensitivity
all_results.loc["Specificity", "C"] = '%.3f' % specificity
all_results.loc["Accuracy", "C"] = '%.3f' % accuracy
In [61]:
all_results.rename(columns={
    "C": "clinical",
    "C-CI": "clinical (95\\% CI)",
    "CR": "clinical+radiomics",
    "CR-CI": "clinical+radiomics (95\\% CI)",
    "p": "$p$",
}, inplace=True)
all_results
Out[61]:
clinical clinical (95\% CI) clinical+radiomics clinical+radiomics (95\% CI) $p$
AP 0.102 [0.045, 0.192] 0.250 [0.097, 0.486] 0.039
PR-AUC 0.094 [0.048, 0.184] 0.242 [0.089, 0.497] 0.057
G-Mean 0.681 [0.542, 0.776] 0.737 [0.574, 0.860] 0.352
F1 0.155 [0.088, 0.254] 0.253 [0.143, 0.400] 0.012
ROC-AUC 0.737 [0.609, 0.833] 0.728 [0.516, 0.861] 0.836
Sensitivity 0.706 NaN 0.647 NaN NaN
Specificity 0.658 NaN 0.840 NaN NaN
Accuracy 0.660 NaN 0.831 NaN NaN